# Remove all variables from the R environment to create a fresh start
rm(list=ls())
# Load datasets
train1 <- read.csv("train_dataset01.csv")
train2 <- read.csv("train_dataset02.csv")
test <- read.csv("test_dataset.csv")
levels(train2$STATUS_PU3) <- c("False", "True")
levels(train2$STATUS_PU5) <- c("False", "True")
levels(train2$STATUS_PU8) <- c("False", "True")
levels(train2$STATUS_PU9) <- c("False", "True")
levels(test$STATUS_PU8) <- c("False", "True")
levels(test$STATUS_PU9) <- c("False", "True")

Split

library(caTools)
set.seed(100)
spl <-sample.split(train2$ATT_FLAG, SplitRatio =0.7)
attackTrain <- subset(train2, spl == TRUE)
attackTest <- subset(train2, spl == FALSE)

Train and validate (CART)

# Random Forest
library(randomForest)
# Build the model
attackTrain$DATETIME <- NULL
model1 <- randomForest(ATT_FLAG~., data=attackTrain)
summary(model1)
                Length Class  Mode     
call                3  -none- call     
type                1  -none- character
predicted       12686  factor numeric  
err.rate         1500  -none- numeric  
confusion           6  -none- numeric  
votes           25372  matrix numeric  
oob.times       12686  -none- numeric  
classes             2  -none- character
importance         43  -none- numeric  
importanceSD        0  -none- NULL     
localImportance     0  -none- NULL     
proximity           0  -none- NULL     
ntree               1  -none- numeric  
mtry                1  -none- numeric  
forest             14  -none- list     
y               12686  factor numeric  
test                0  -none- NULL     
inbag               0  -none- NULL     
terms               3  terms  call     
varImpPlot(model1)

# Prediction
predict1 <- predict(model1, newdata=attackTest)
cm <- table(predict1, attackTest$ATT_FLAG)
cm
        
predict1 False True
   False  4844   40
   True      3  550
precision <- cm[2,2]/sum(cm[2,])
recall <- cm[2,2]/sum(cm[,2])
f1 <- 2 * precision * recall / (precision + recall)
precision
[1] 0.994575
recall
[1] 0.9322034
f1
[1] 0.9623797

Train on whole train2 and predict test

# Build the model
train2$DATETIME <- NULL
model2 <- randomForest(ATT_FLAG~., data=train2)
summary(model2)
                Length Class  Mode     
call                3  -none- call     
type                1  -none- character
predicted       18123  factor numeric  
err.rate         1500  -none- numeric  
confusion           6  -none- numeric  
votes           36246  matrix numeric  
oob.times       18123  -none- numeric  
classes             2  -none- character
importance         43  -none- numeric  
importanceSD        0  -none- NULL     
localImportance     0  -none- NULL     
proximity           0  -none- NULL     
ntree               1  -none- numeric  
mtry                1  -none- numeric  
forest             14  -none- list     
y               18123  factor numeric  
test                0  -none- NULL     
inbag               0  -none- NULL     
terms               3  terms  call     
varImpPlot(model2)

# Prediction
predict2 <- predict(model2, newdata=test)

See performance

test$ATT_FLAG <- predict2
test.ts <- ts(test)
ignore = c("LEVEL_T5", "FLOW_PU3", "FLOW_PU5", "FLOW_PU9", "STATUS_PU3", "STATUS_PU5", "STATUS_PU8", "STATUS_PU9")
test.small <- test[ , -which(names(test) %in% ignore)]
test.small.ts <- ts(test.small)
for (col in colnames(test.small.ts)) {
  if (col != "DATETIME" & col != "ATT_FLAG") {
    plot.ts(test.small.ts[,col], ylab=col, col=c("black"))
    par(new = TRUE)
    plot.ts(test.small.ts[,"ATT_FLAG"], axes=FALSE, bty = "n", xlab = "", ylab = "", col="red")
  }
}

LS0tCnRpdGxlOiAiUmFuZG9tIEZvcmVzdCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgUmVtb3ZlIGFsbCB2YXJpYWJsZXMgZnJvbSB0aGUgUiBlbnZpcm9ubWVudCB0byBjcmVhdGUgYSBmcmVzaCBzdGFydApybShsaXN0PWxzKCkpCgojIExvYWQgZGF0YXNldHMKdHJhaW4xIDwtIHJlYWQuY3N2KCJ0cmFpbl9kYXRhc2V0MDEuY3N2IikKdHJhaW4yIDwtIHJlYWQuY3N2KCJ0cmFpbl9kYXRhc2V0MDIuY3N2IikKdGVzdCA8LSByZWFkLmNzdigidGVzdF9kYXRhc2V0LmNzdiIpCgpsZXZlbHModHJhaW4yJFNUQVRVU19QVTMpIDwtIGMoIkZhbHNlIiwgIlRydWUiKQpsZXZlbHModHJhaW4yJFNUQVRVU19QVTUpIDwtIGMoIkZhbHNlIiwgIlRydWUiKQpsZXZlbHModHJhaW4yJFNUQVRVU19QVTgpIDwtIGMoIkZhbHNlIiwgIlRydWUiKQpsZXZlbHModHJhaW4yJFNUQVRVU19QVTkpIDwtIGMoIkZhbHNlIiwgIlRydWUiKQoKbGV2ZWxzKHRlc3QkU1RBVFVTX1BVOCkgPC0gYygiRmFsc2UiLCAiVHJ1ZSIpCmxldmVscyh0ZXN0JFNUQVRVU19QVTkpIDwtIGMoIkZhbHNlIiwgIlRydWUiKQpgYGAKClNwbGl0CmBgYHtyfQpsaWJyYXJ5KGNhVG9vbHMpCnNldC5zZWVkKDEwMCkKc3BsIDwtc2FtcGxlLnNwbGl0KHRyYWluMiRBVFRfRkxBRywgU3BsaXRSYXRpbyA9MC43KQphdHRhY2tUcmFpbiA8LSBzdWJzZXQodHJhaW4yLCBzcGwgPT0gVFJVRSkKYXR0YWNrVGVzdCA8LSBzdWJzZXQodHJhaW4yLCBzcGwgPT0gRkFMU0UpCmBgYAoKVHJhaW4gYW5kIHZhbGlkYXRlIChDQVJUKQpgYGB7cn0KIyBSYW5kb20gRm9yZXN0CmxpYnJhcnkocmFuZG9tRm9yZXN0KQoKIyBCdWlsZCB0aGUgbW9kZWwKYXR0YWNrVHJhaW4kREFURVRJTUUgPC0gTlVMTAptb2RlbDEgPC0gcmFuZG9tRm9yZXN0KEFUVF9GTEFHfi4sIGRhdGE9YXR0YWNrVHJhaW4pCnN1bW1hcnkobW9kZWwxKQp2YXJJbXBQbG90KG1vZGVsMSkKCiMgUHJlZGljdGlvbgpwcmVkaWN0MSA8LSBwcmVkaWN0KG1vZGVsMSwgbmV3ZGF0YT1hdHRhY2tUZXN0KQpjbSA8LSB0YWJsZShwcmVkaWN0MSwgYXR0YWNrVGVzdCRBVFRfRkxBRykKY20KCnByZWNpc2lvbiA8LSBjbVsyLDJdL3N1bShjbVsyLF0pCnJlY2FsbCA8LSBjbVsyLDJdL3N1bShjbVssMl0pCmYxIDwtIDIgKiBwcmVjaXNpb24gKiByZWNhbGwgLyAocHJlY2lzaW9uICsgcmVjYWxsKQoKcHJlY2lzaW9uCnJlY2FsbApmMQpgYGAKClRyYWluIG9uIHdob2xlIHRyYWluMiBhbmQgcHJlZGljdCB0ZXN0CmBgYHtyfQojIEJ1aWxkIHRoZSBtb2RlbAp0cmFpbjIkREFURVRJTUUgPC0gTlVMTAptb2RlbDIgPC0gcmFuZG9tRm9yZXN0KEFUVF9GTEFHfi4sIGRhdGE9dHJhaW4yKQpzdW1tYXJ5KG1vZGVsMikKdmFySW1wUGxvdChtb2RlbDIpCgojIFByZWRpY3Rpb24KcHJlZGljdDIgPC0gcHJlZGljdChtb2RlbDIsIG5ld2RhdGE9dGVzdCkKYGBgCgpTZWUgcGVyZm9ybWFuY2UKYGBge3J9CnRlc3QkQVRUX0ZMQUcgPC0gcHJlZGljdDIKdGVzdC50cyA8LSB0cyh0ZXN0KQoKaWdub3JlID0gYygiTEVWRUxfVDUiLCAiRkxPV19QVTMiLCAiRkxPV19QVTUiLCAiRkxPV19QVTkiLCAiU1RBVFVTX1BVMyIsICJTVEFUVVNfUFU1IiwgIlNUQVRVU19QVTgiLCAiU1RBVFVTX1BVOSIpCnRlc3Quc21hbGwgPC0gdGVzdFsgLCAtd2hpY2gobmFtZXModGVzdCkgJWluJSBpZ25vcmUpXQp0ZXN0LnNtYWxsLnRzIDwtIHRzKHRlc3Quc21hbGwpCgpmb3IgKGNvbCBpbiBjb2xuYW1lcyh0ZXN0LnNtYWxsLnRzKSkgewogIGlmIChjb2wgIT0gIkRBVEVUSU1FIiAmIGNvbCAhPSAiQVRUX0ZMQUciKSB7CiAgICBwbG90LnRzKHRlc3Quc21hbGwudHNbLGNvbF0sIHlsYWI9Y29sLCBjb2w9YygiYmxhY2siKSkKICAgIHBhcihuZXcgPSBUUlVFKQogICAgcGxvdC50cyh0ZXN0LnNtYWxsLnRzWywiQVRUX0ZMQUciXSwgYXhlcz1GQUxTRSwgYnR5ID0gIm4iLCB4bGFiID0gIiIsIHlsYWIgPSAiIiwgY29sPSJyZWQiKQogIH0KfQpgYGAKCg==